covid_df =
read_csv("./data/p8105_final_ped_covid.csv") %>%
janitor::clean_names() %>%
mutate(county = NA,
county = as.character(county)) %>%
select(city, county, everything()) %>%
mutate(city = tolower(city)) %>%
mutate(city = str_replace(city, "n white plains", "white plains")) %>%
mutate(county = case_when(city == "bronx" ~ "bronx",
city == "brooklyn" ~ "kings",
city == "yonkers" ~ "westchester",
city == "new york" ~ "new york",
city == "mount vernon" ~ "westchester",
city == "new rochelle" ~ "westchester",
city == "white plains" ~ "westchester",
city == "ridgewood" ~ "queens",
city == "nanuet" ~ "rockland",
city == "bergenfield" ~ "bergen",
city == "ossining" ~ "westchester",
city == "monroe" ~ "orange",
city == "newburgh" ~ "orange",
city == "staten island" ~ "richmond",
city == "port chester" ~ "westchester",
city == "spring valley" ~ "rockland",
city == "irvington" ~ "westchester",
city == "flushing" ~ "queens",
city == "chappaqua" ~ "westchester",
city == "new city" ~ "rockland",
city == "ferncliff manor" ~ "westchester",
city == "greenwich" ~ "washington",
city == "haverstraw" ~ "rockland",
city == "suffern" ~ "rockland",
city == "berkeley heights" ~ "union")
) %>%
mutate(eventdatetime = as.Date(eventdatetime, "%m/%d/%Y"),
eventdatetime = format(eventdatetime, "%m-%Y"),
eventdatetime = zoo::as.yearmon(eventdatetime, "%m-%Y")) %>%
mutate(
ethnicity_race = case_when(
race == "R3 Black or African-American" ~ "black",
race == "R2 Asian" ~ "asian",
race == "R5 White" ~ "caucasian",
race == "R1 American Indian or Alaska Native" ~ "american indian",
race == "Multiple Selected" ~ "multiple",
ethnicity == "E1 Spanish/Hispanic/Latino" ~ "latino"
))
covid_df %>% view()
Here, we show average SES and average BMI Kyung: This is awesome! When both SES and BMI are selected, when I hover on the map, I see can see only average SES - I am assuming you also intend to add average BMI information as well? Individually, SES and BMI show up fine. Also, there is a lot of βNaNβ for average SES, but I assume it is due to missing data.
# Map Viz using tidycensus and tmap (use ses)
county_ny = c("bronx", "kings", "westchester", "new york", "queens", "rockland", "orange", "richmond")
county_nj = c("bergen", "union")
shape_ny =
get_acs(geography = "tract",
variables = "B19013_001",
state = "NY",
county = county_ny,
geometry = TRUE) %>%
janitor::clean_names() %>%
select(name, geometry) %>%
separate(name, into = c("county", "state"), sep = -17) %>%
mutate(state = str_sub(state, 10),
county = tolower(county),
county = sub(".*\\s", "", trimws(county)),
county = str_replace(county, "york", "new york"))
shape_nj =
get_acs(geography = "tract",
variables = "B19013_001",
state = "NJ",
county = county_nj,
geometry = TRUE) %>%
janitor::clean_names() %>%
select(name, geometry) %>%
separate(name, into = c("county", "state"), sep = -18) %>%
mutate(state = str_sub(state, 9),
county = tolower(county),
county = sub(".*\\s", "", trimws(county)))
shape_full =
rbind(shape_ny, shape_nj)
# admitted =
# covid_df %>%
# group_by(county, admitted) %>%
# summarize(count = n())
# ny_map =
# left_join(shape_ny, bmi_mean, by = "county")
bmi_mean =
covid_df %>%
group_by(county) %>%
summarize(bmi_mean = mean(bmi_value, na.rm = TRUE))
ses_mean =
covid_df %>%
group_by(county) %>%
summarize(ses_mean = mean(ses, na.rm = TRUE))
full_map_bmi =
left_join(shape_full, bmi_mean, by = "county")
full_map_ses =
left_join(shape_full, ses_mean, by = "county")
tmap_mode("view")
tm_shape(full_map_bmi) +
tm_fill(
col = "bmi_mean",
palette = "viridis",
style = "quantile",
contrast = c(0.3, 1),
title = "Average BMI",
textNA = "Not Available",
id = "state",
popup.vars=c("County: " = "county",
"Average BMI: " = "bmi_mean")) +
tm_borders(col = "white") +
tm_shape(full_map_ses) +
tm_fill(
col = "ses_mean",
palette = "RdYlBu",
style = "quantile",
contrast = c(0.3, 1),
title = "Average SES",
textNA = "Not Available",
id = "state",
popup.vars=c("County: " = "county",
"Average SES: " = "ses_mean")) +
tm_borders(col = "white") +
tm_view(
alpha = 0.85,
view.legend.position = c("right", "bottom")) +
tm_scale_bar(text.size = 1) +
tm_facets(as.layers = TRUE, sync = TRUE)
The number of pediatric patients testing positive for COVID-19 is shown as a function of time, by admission status (yes/no). The number of patients who returned positive SARS-CoV-2 RT-PCR tests peaked in late February and decreased until April. From April to June, levels of positive tests generally plateaued for both admitted and non-admitted patients until June, at which point levels dropped off to close to zero for both groups.
# Difference between admitted over the year
plot_1 =
covid_df %>%
group_by(eventdatetime, admitted) %>%
summarize(count = n()) %>%
ggplot(aes(x = eventdatetime,
y = count,
color = admitted)) +
geom_point(aes(text = paste("Date: ", eventdatetime,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_line() +
labs(title = "",
x = "Event Date",
y = "Number of Events",
color = "Admitted")
ggplotly(plot_1, tooltip = "text")
The ethnic and racial background of pediatric patients with COVID-19 infection reflect the diverse population served in the Bronx. Latinos represent the majority of patients in whom positive SARS-CoV-2 RT-PCR test results were returned, followed by blacks, caucasians, and asians. Overall, latino and black children with COVID-19 infection seem to have a lower hospitalization rates compared to caucasian and asian children.
# Race and admitted
plot_2 =
covid_df %>%
group_by(ethnicity_race, admitted) %>%
summarize(count = n()) %>%
filter(ethnicity_race != "american indian") %>%
filter(ethnicity_race != "multiple") %>%
ggplot(aes(x = fct_reorder(ethnicity_race, count),
y = count,
fill = admitted,
text = paste("Ethnicity: ", ethnicity_race,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_bar(stat="identity", position=position_dodge()) +
coord_flip() +
labs(title = "",
x = "Number of Counts",
y = "Ethnicity",
fill = "Admitted")
ggplotly(plot_2, tooltip = "text")
The below interactive plot shows plots of BMI as a function of age, stratified by race. In general, BMI values seem to increase as age increases. There are outliers with some extreme BMI values, including some with a BMI of greater than 60.
plot_3 =
covid_df %>%
filter(ethnicity_race != "multiple") %>%
ggplot(aes(x = age, y = bmi_value, color = ethnicity_race)) +
geom_point(aes(text = paste("Age: ", age,
"\nBMI: ", bmi_value,
"\nEthnicity: ", ethnicity_race))) +
geom_smooth(se = FALSE) +
labs(title = "",
x = "Age",
y = "BMI Value",
color = "Ethnicity")
ggplotly(plot_3, tooltip = "text")
The below interactive bar graph is another representation of our earlier exploratory ggplot showing the density or count of COVID-19 positivity as function of age. This information is further stratified by admission status (yes/no).
plot_4 =
covid_df %>%
mutate(age = round(age),
age = as.factor(age)) %>%
group_by(age, admitted) %>%
summarize(count = n()) %>%
ggplot(aes(x = age,
y = count,
fill = admitted,
text = paste("Age: ", age,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_bar(stat="identity", position=position_dodge()) +
labs(title = "",
x = "Age",
y = "Number of Counts",
fill = "Admitted")
ggplotly(plot_4, tooltip = "text")
The scatterplot shows BMI values versus age by admission status with a smooth curve. In general, BMI values increase as age increase for both admitted and non-admitted patients. The smooth curves generally overlap but diverge beginning at around age 14, with a trend for more admissions for extremely obese patients with high outlier BMI values.
plot_5 =
covid_df %>%
ggplot(aes(x = age, y = bmi_value, color = admitted)) +
geom_point(aes(text = paste("Age: ", age,
"\nBMI Value: ", bmi_value,
"\nAdmitted: ", admitted))) +
geom_smooth(se = FALSE) +
labs(title = "",
x = "Age",
y = "BMI Value",
color = "Admitted")
ggplotly(plot_5, tooltip = "text")
The below interactive bar graph explores the relationship between average age of COVID-19 infected children and geographic location. The average age of pediatric patients was 14.5 years in the Bronx, which returned the greatest positivity count. Staten Island had the oldest average age (22 years) and Berkeley Heights had the youngest average age (5 years), but there was only one patient each from these respective locations.
# Barplot using age / count reorder
plot_6 =
covid_df %>%
group_by(city, county) %>%
summarize(mean_age = mean(age, na.rm = TRUE),
count = n()) %>%
drop_na(city) %>%
ggplot(aes(
x = fct_reorder(city, mean_age),
# x = fct_reorder(city, count),
#tried to recorder by count, but looks funny - I like your plot better
y = mean_age,
fill = count,
text = paste("City: ", city,
"\nAvg Age: ", mean_age,
"\nCounts: ", count))) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "",
x = "City",
y = "Average Age",
fill = "Count")
ggplotly(plot_6, tooltip = "text")
# plot_ly(x = ~fct_reorder(city, mean_age), y = ~mean_age,
# type = "bar",
# color = ~county,
# colors = "viridis",
# alpha = .5) %>%
# layout(title = "",
# xaxis = list(title = ""),
# yaxis = list(title = ""),
# barmode = "stack")